Files_Replicate_Analysis/Chapter 3 examples/All Examples Combined - Chapter 3.R

# 1. Glioblastoma Example ----------------------------------------------------
## 1.1 Load data and Packages ----
if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")

#BiocManager::install("curatedBreastData")
library("survminer")

#https://bioconnector.github.io/workshops/r-survival.html

# Load the bioconductor installer.
# Try http:// if https:// doesn't work.

#BiocManager::install("RTCGA", lib = "C:/Users/phili/Documents/R/win-library/4.0")
# Install the main RTCGA package
library("RTCGA")
#BiocManager::install("RTCGA.clinical")
# Create the clinical data


#https://bioconnector.github.io/workshops/r-survival.html#tcga
#Survival data.
library("RTCGA.clinical")
library("survival")
library("bshazard")
library("muhaz")
library("survHE")
library("sfsmisc")
library("PiecewiseChangepoint")
#devtools::load_all()


clinfinal <- survivalTCGA(GBM.clinical,
                          extract.cols="admin.disease_code")

clinfinal$time <- clinfinal$times/365.25
clinfinal[which(clinfinal$time ==0),"time"] <- 0.001
clinfinal$status <- clinfinal$patient.vital_status

surv.fit_glio <- survfit(Surv(time, status)~1, data=clinfinal)

survminer::ggsurvplot(surv.fit_glio)


## 1.2 Fit Collapsing Model ----

Collapsing_Model <- collapsing.model(clinfinal,
                                     n.iter = 20750,
                                     burn_in = 750,
                                     n.chains = 2,
                                    timescale = "years")



## 1.3 Get the various Hazard Functions ----
clinfinal <- clinfinal %>% mutate(enter = 0)

bshazard.fit <- bshazard(formula = Surv(enter, time,
                                        status) ~ 1,
                         data = clinfinal,
                         nbin = length(unique(clinfinal[which(clinfinal$status == 1),"time"])))


plot(bshazard.fit)

bshazard.df  <- data.frame(time = bshazard.fit[["time"]],
                           hazard = bshazard.fit[["hazard"]],
                           upper.ci = bshazard.fit[["upper.ci"]],
                           lower.ci = bshazard.fit[["lower.ci"]])



harzard.plt.mod <- function(time.vec, cens.vec, Outcome = "Survival",
                            lrg.haz.int = 1,  bw.method = "local"){

  #Plot the hazards
  max.time <- max(time.vec)
  result.hazard.pe.lrg <- pehaz(time.vec, cens.vec, width= lrg.haz.int, max.time=max.time)

  result.smooth <- muhaz(time.vec, cens.vec, b.cor="left",
                         max.time=max.time, bw.method = bw.method)

  #Plot the Survival function

  plot(result.hazard.pe.lrg,  col="black", main = paste0("Hazards for ", Outcome))
  #result.hazard.pe.sml <- pehaz(time.vec, cens.vec, width=sml.haz.int.inv, max.time=max.time)
  #lines(result.hazard.pe.sml,  col="blue")

  lines(result.smooth, col = "red", lty= 2)
  legend("topright", legend = c(paste0(lrg.haz.int," year hazard step function"),
                                #paste0(sml.haz.int," month hazard step function"),
                                "Smoothed hazard function"),
         col = c("black",
                 #"blue",
                 "red"),
         lty = c(1,
                 #2,
                 2), cex = 0.8)

  return(result.hazard.pe.lrg)
}


hazard.PFS.mod <-harzard.plt.mod(time.vec = clinfinal$time,
                                 cens.vec =  clinfinal$status ,
                                 Outcome = "Death", lrg.haz.int = 1)

df.haz <- data.frame(time = hazard.PFS.mod$Cuts[-1]-1,
                     hazard = c(hazard.PFS.mod$Hazard))


#View(Collapsing_Model)
haz_plot_collapsing <- plot(Collapsing_Model, type = "hazard")
haz_df <- haz_plot_collapsing$data
haz_df_summary <- haz_df %>% group_by(timepoints) %>% dplyr::summarize(hazards.mean = mean(hazards),
                                                               hazards.975 = quantile(hazards, 0.975),
                                                               hazards.025 = quantile(hazards, 0.025))


path_latex <- "C:/Users/phili/OneDrive/PhD/Latex Folder/"
max_time <- 8
time_index <- 1
ggplot(bshazard.df[which(bshazard.df$time <= round(max_time,0)),], aes(x = time, y = hazard))+
  geom_line(aes(colour = "blue"), size = 1.3,linetype = "longdash")+
  geom_ribbon(aes(ymin = lower.ci, ymax = upper.ci, fill = "blue"), alpha = 0.2)+
  geom_step(data = df.haz[which(df.haz$time <= round(max_time,0)),],  aes(time, hazard, colour = "black"), size = 1.1,linetype = "dotdash")+
  xlab("Time (Years)")+
  ylab("Hazard")+
  #ggtitle(ggtitle_vec[i])+
  #geom_step(data = haz_df ,aes(timepoints, hazards,group = id), linetype = "dashed", alpha = 0.03, colour = "red")+
  geom_line(aes(timepoints, hazards.mean, colour = "purple"),data = haz_df_summary, size = 1.3)+
  ggplot2::geom_ribbon(aes(ymin = hazards.025, ymax = hazards.975, x = timepoints,fill = "purple"),data =haz_df_summary,
                       inherit.aes = F, alpha = 0.2)+
  #scale_y_continuous(breaks = seq(from = 0, to = 0.2, by =0.1))+
  scale_x_continuous(expand = c(0, 0), limits = c(0, round(max_time,0)), breaks= seq(0, round(max_time,0), by = time_index)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, NA), breaks = seq(from = 0, to = 1.7, by =0.1))+
  # coord_cartesian(ylim = c(0,.2),xlim = c(0,max_time))+
  scale_colour_manual(name = 'Hazard Functions',
                      values =c('black'='black','blue'='blue','purple'= 'purple'), labels = c('Interval hazard','bsharzard', 'PEM'))+
  scale_fill_identity(name = '95% Intervals', guide = 'legend',labels = c('bsharzard', "PEM")) +
  theme_light()
#Non-parametric Hazards
ggsave(paste0("Hazards_Gioblastoma.png"), width = 7, height = 5)




# 2. Stanford Example ----------------------------------------------------

## 2.1 Load data and fit collapsing model and standard Parametric models -----

stanford2 <- survival::stanford2
stanford2$time <- stanford2$time/365
surv.stanford <- survfit(Surv(time,status)~1,data = stanford2)

dist.res  <- flexsurvreg(Surv(time,status)~1,data = stanford2, dist = "gengamma")

# add method to grid.draw
grid.draw.ggsurvplot <- function(x){
  survminer:::print.ggsurvplot(x, newpage = FALSE)
}

cum_haz_stan <- survminer::ggsurvplot(surv.stanford, risk.table = TRUE, fun = "cumhaz",break.time.by =1,
                                      xlab = "Time (Years)")

ggsave(file = "cum_haz_stan.png", cum_haz_stan, width = 8, height = 6)

time_censor <- 2#365 #2years

stanford_test <- stanford2 %>% rename(time_event = time) %>%
                               mutate(time = ifelse(time_event <= time_censor, time_event, time_censor),
                                      status = ifelse(time_event <= time_censor, status, 0))

Collapsing_Model <- collapsing.model(stanford_test,
                                     n.iter = 20750,
                                     burn_in = 750,
                                     n.chains = 2,timescale = "years")



mod_comp <-compare.surv.mods(Collapsing_Model,
                             max_predict = 10,
                             plot.best =4,
                             chng.num = 2,
                             n.iter.jags = 10000,
                             n.thin.jags = 1,
                             n.burnin.jags = 1000)

stanford_plot<- PiecewiseChangepoint::compare_dco(mod_comp,old_dco = stanford_test, new_dco = stanford2,km_risk = NULL)+
  xlab("Time (Years)")

ggsave(filename ="Stanford_Survival.png",plot = stanford_plot, height = 5.59, width = 7)




stanford_boot <- PiecewiseChangepoint:::compare_boot_sims(mod_parametric_orig = mod_comp,
                                                             follow_up_data =stanford2)



print(xtable::xtable(stanford_boot, type = "latex"), include.rownames=FALSE)



# 3. Leukemia Example ----------------------------------------------------

library("openxlsx")
leukemia.df <- read.xlsx(paste0("G:\\My Drive\\PhD\\R codes\\Changepoint Analysis\\","Achar Leukemia Data.xlsx"),1)
leukemia.df <- leukemia.df[which(leukemia.df[,1] !=182),]


## 3.1 Fit Collapsing Model ------------



Collapsing_Model_leukemia <- collapsing.model(leukemia.df,
                                     n.iter = 20750,
                                     burn_in = 750,
                                     n.chains = 2,
                                     timescale = "days")

## 3.2 Fit Gibbs Samplers ------------

gibbs_1chng <- PiecewiseChangepoint:::gibbs.sampler(df = leukemia.df,
                                                    num.breaks = 1, # Number of Changepoints
                                                    n.iter = 5000,
                                                    burn_in = 100,
                                                    num.chains = 2,
                                                    n.thin = 1,
                                                    alpha.hyper = 1,  #Hyperparameter for lambda prior
                                                    beta.hyper = 365, # #Hyperparameter for lambda prior
                                                    MLE = FALSE)
log.lik.df_1chng <- get.loglik(leukemia.df, gibbs_1chng$lambda, gibbs_1chng$changepoint)
loo::waic(log.lik.df_1chng)

plt_surv1 <- PiecewiseChangepoint:::plot.changepoint(gibbs_1chng, max_predict = 2000, chng.num = 1)+xlab("Time (Days)")

ggsave(filename ="leukemia1_change.png",plot = plt_surv1, height = 5.59, width = 7)

plt_chng1 <- PiecewiseChangepoint:::plot.pos_changepoint(Collapsing_Model_leukemia,
                                                         breaks = seq(300, 700, by = 50))+xlab("Time (Days)")+
  theme_bw()
ggsave(filename ="leukemia1.png",plot = plt_chng1, height = 5.59, width = 7)


gibbs_2chng <- PiecewiseChangepoint:::gibbs.sampler(df = leukemia.df,
                          num.breaks = 2, # Number of Changepoints
                          n.iter = 5000,
                          burn_in = 100,
                          num.chains = 2,
                          n.thin = 1,
                          alpha.hyper = 1, #Hyperparameter for lambda prior
                          beta.hyper = 365, #Hyperparameter for lambda prior
                          MLE = FALSE)
  #If TRUE, considers on Uniform Prior
plt_surv2 <- PiecewiseChangepoint:::plot.changepoint(gibbs_2chng, max_predict = 2000)

ggarrange(plt_surv1+xlab("Time"),
          plt_surv2+xlab("Time")+ylab(""))
ggsave(paste0(path_latex,"Leukemia_both.png"),width=12,height =4)

log.lik.df_2chng <- get.loglik(leukemia.df, gibbs_2chng$lambda, gibbs_2chng$changepoint)
loo::waic(log.lik.df_2chng)

#Limitations of Bayesian Leave-One-Out Cross-Validation for Model Selection



# 4 Comparison of the Collapsing approach with Marginal Likelihood evaluation ------------

set.seed(123)
n_obs =100
n_events_req=100
max_time =  2

rate = c(0.75,0.25)
t_change =1

df <- leukemia.df



Collapsing_Model <- collapsing.model(df,
                                     n.iter = 50000,
                                     burn_in = 750,
                                     n.chains = 3,
                                     timescale = "days",
                                     max.num.breaks = 6)
#remove.packages("PiecewiseChangepoint")
#install.packages("combinat")
#install.packages("Brobdingnag")

library("Brobdingnag")
library("combinat")

beta.hyper <- 365 # If we set the value of beta to the posterior mean of beta from the collapsing model we get very similar values
time_diffs<- PiecewiseChangepoint:::df_recast(df)
#n.sims <- 100000
n.sims <- 10000
beta_vals <- 2 # Fix the value of beta (set to 2 so there is no issue with matrix to vector coercion)


marg_vec2 <- marg_vec <- c()
for(k in 1:4){

  k_change <- k
  n_events <- nrow(time_diffs)
  combs <- combn(1:n_events,k_change)
  probs <-apply(combs,2,function(x){PiecewiseChangepoint:::pos_prob(x, n_events, FALSE)})


  #PiecewiseChangepoint:::pos_prob(c(2,4,8), 10, FALSE)

  index.drp <- which(probs == 0)
  probs <- probs[-index.drp]
  combs <- combs[,-index.drp, drop = F]
  cum_prob <- cumsum(probs)
  max(cum_prob)
  unif_sims <- runif(n.sims)
  k_prior_loc <- sapply(unif_sims, function(x){min(which(cum_prob >= x))})


  # marg_vec <- c(marg_vec,log(mean(exp(apply(combs[,k_prior_loc,drop = F], 2,
  #                    function(x){PiecewiseChangepoint::margin_lik_calc_log(time_diffs,x,1,365)+
  #                        log(PiecewiseChangepoint:::pos_prob(x, n_events, FALSE))})))))

  # marg_vec2 <- c(marg_vec2,log(mean(exp(apply(combs[,k_prior_loc,drop = F], 2,
  #                                           function(x){PiecewiseChangepoint::margin_lik_calc_log(time_diffs,x,1,500)})))))



  beta_array <- array(dim = c(dim(combs)[1]+1, dim(combs)[2],beta_vals))

  for(i in 1:beta_vals){
    beta_array[,,i] <- rbind(combs, rep(beta.hyper,dim(combs)[2]))#rgamma(dim(combs)[2],1, 1/beta.hyper))

  }


  res <- matrix(nrow = n.sims, ncol = beta_vals)

  for(j in 1:beta_vals){

    res[,j] <-  apply(beta_array[,k_prior_loc,j,drop = F],2,
                      function(x){PiecewiseChangepoint:::margin_lik_calc_log(time_diffs,x[1:k],1,x[k+1])})

  }

  brob_res <- exp(as.brob(as.vector(res)))
  marg_vec2 <- c(marg_vec2, sum(brob_res)/length(brob_res))

}



#NAIVE ESTIMATION
# lambda_naive  <- rgamma(100000, 1, 500)
# log(mean((lambda_naive^sum(df$status))*exp(-lambda_naive*sum(df$time))))

res_no_chng <- c()


for(i in 1:beta_vals){
  res_no_chng <- c(res_no_chng, PiecewiseChangepoint:::margin_lik_calc_log(time_diffs,NA,1,beta.hyper))#rgamma(1, 1, 1/beta.hyper)))
}

brob_res_no_chng <- exp(as.brob(as.vector(res_no_chng)))
marg_no_chng <- sum(brob_res_no_chng)/length(brob_res_no_chng)
marg.like <- c(marg_no_chng,marg_vec2)

#exp(marg_vec2[1]-margin_lik_calc_log(time_diffs,NA,1,500))

log_marg <- unlist(lapply(marg.like,function(x){log(x)}))

options(scipen = 100)
log_marg2 <- log_marg+ dpois(0:(length(log_marg)-1), 1, log = T)
round(exp(diff(log_marg2)), digits = 4)

log_marg/2.303
log_marg2/2.303
round(exp(diff(log_marg)), digits = 4)
round(exp(log_marg2)/sum(exp(log_marg2)), digits = 4)


Collapsing_Model$prob.changepoint[2]/Collapsing_Model$prob.changepoint[1]
Collapsing_Model$prob.changepoint[3]/Collapsing_Model$prob.changepoint[2]
Collapsing_Model$prob.changepoint[4]/Collapsing_Model$prob.changepoint[3]


# We show that we can evaluate the marginal likelihood by brute force and obtain similar Bayes Factors although much quicker


# 5 Convergence assessment of Collapsing Models ------------




RJMCMC_conver_plt <- function(k,lambda, batch.size = 50, test.index){

  n.chains <- dim(k)[3]
  complete.batch <- floor(nrow(k[,,1])/batch.size)
  nrow.array <- complete.batch*batch.size
  lambda <- lambda[1:nrow.array,,]
  k <- k[1:nrow.array,,]
  chain.id <- rep(1:n.chains, each = nrow.array)
  batch.id <- rep(1:complete.batch, each = batch.size )

  k.stacked.converge <- k[,,1]
  lambda.stacked.converge <- lambda[,,1]
  #changepoint.stacked.converge <- changepoint[,,1]

  for(i in 2:n.chains){
    k.stacked.converge  <-rbind(k.stacked.converge,k[,,i])
    lambda.stacked.converge  <-rbind(lambda.stacked.converge,lambda[,,i])
    #changepoint.stacked.converge <-rbind(changepoint.stacked.converge,changepoint[,,i])
  }

  converge.array <-   array(NA,dim = c(nrow.array*n.chains,5))
  converge.array[,1] <- apply(k.stacked.converge,1,
                              FUN= index.loc,index = test.index)

  for(i in 1:nrow(converge.array)){
    converge.array[i,2] <- lambda.stacked.converge[i,converge.array[i,1]]
  }
  num.changepoints<- apply(k.stacked.converge,1,function(x){length(na.omit(x))})
  converge.array[,3] <- num.changepoints
  converge.array[,4] <- batch.id
  converge.array[,5] <- chain.id
  converge.array <- data.frame(converge.array)
  colnames(converge.array) <- c("Location","Hazard", "NumChangepoint", "Batch","Chain")


  PSRF1_1 <- rep(NA,complete.batch)
  PSRF1_2 <- rep(NA,complete.batch)

  PSRF2_1 <- rep(NA,complete.batch)
  PSRF2_2 <- rep(NA,complete.batch)

  MPSRF1 <- rep(NA,complete.batch)
  MPSRF2  <- rep(NA,complete.batch)
  eigen1 <- rep(NA,complete.batch)
  eigen2 <- rep(NA,complete.batch)
  eigen3 <- rep(NA,complete.batch)
  eigen4 <- rep(NA,complete.batch)


  for(i in 1:complete.batch){

    temp.converge.array <- converge.array %>% dplyr::filter(Batch %in% c(1:i))
    T.val <- batch.size*i
    M.val <- length(unique(temp.converge.array$NumChangepoint))

    eq.11 <- temp.converge.array %>% group_by(Chain,NumChangepoint) %>%
      dplyr::summarise_all(mean,na.rm =T) %>% dplyr::select(-Batch)
    names(eq.11) <- c("Chain","NumChangepoint","Location_avg", "Hazard_avg")

    eq.12 <- temp.converge.array %>% group_by(Chain) %>% dplyr::summarise_all(mean,na.rm =T)
    eq.12 <- eq.12[,c(1:3)]
    names(eq.12) <- c("Chain","Location_avg", "Hazard_avg")

    eq.13 <- temp.converge.array %>% group_by(NumChangepoint) %>%
      dplyr::summarise_all(mean,na.rm =T)
    eq.13 <- eq.13[,c(1:3)]
    names(eq.13) <- c("NumChangepoint","Location_avg", "Hazard_avg")

    eq.14  <- colMeans(temp.converge.array) #Equation 14 in Castelloe et al
    eq.14 <- eq.14[c(1,2)]

    #Multivariate & univariate
    eq.37 <- var(temp.converge.array[,c(1,2)])
    eq.15 <- diag(var(temp.converge.array[,c(1,2)]))

    eq.38 <- temp.converge.array[,-c(3,4)] %>% group_by(Chain) %>% dplyr::left_join(eq.12)%>%
      mutate(diff_haz = Hazard-Hazard_avg,
             diff_loc = Location-Location_avg)

    eq.16 <- colSums((eq.38[,c("diff_loc","diff_haz")])^2)/(n.chains*(T.val -1))

    eq.38 <- matrix(rowSums(apply(eq.38[,c("diff_loc","diff_haz")],1,FUN = function(x){x%*%t(x)}))/
                      (n.chains*(T.val -1)),2,2)

    eq.39 <- temp.converge.array[,-c(4,5)]  %>% dplyr::left_join(eq.13, by = "NumChangepoint") %>%
      mutate(diff_haz = Hazard-Hazard_avg,
             diff_loc = Location-Location_avg)

    eq.17 <- colSums((eq.39[,c("diff_loc","diff_haz")]^2))/(n.chains*T.val -M.val)

    eq.39 <- matrix(rowSums(apply(eq.39[,c("diff_loc","diff_haz")],1,
                                  FUN = function(x){x%*%t(x)}))/(n.chains*T.val -M.val),2,2)

    eq.40 <- temp.converge.array[,-c(4)]  %>% dplyr::left_join(eq.11, by = c("Chain","NumChangepoint")) %>%
      mutate(diff_haz = Hazard-Hazard_avg,
             diff_loc = Location-Location_avg)

    eq.18 <- colSums((eq.40[,c("diff_loc","diff_haz")]^2))/(n.chains*(T.val -M.val))

    eq.40 <- matrix(rowSums(apply(eq.40[,c("diff_loc","diff_haz")],1,FUN = function(x){x%*%t(x)}))/(n.chains*(T.val -M.val)),2,2)

    PSRF1_1[i] <- eq.15[1]/eq.16[1]
    PSRF1_2[i] <- eq.15[2]/eq.16[2]

    PSRF2_1[i] <- eq.17[1]/eq.18[1]
    PSRF2_2[i] <- eq.17[2]/eq.18[2]

    MPSRF1[i] <- max(eigen(solve(eq.38)%*%eq.37)$values)
    MPSRF2[i]  <- max(eigen(solve(eq.40)%*%eq.39)$values)
    eigen1[i] <- max(eigen(eq.37)$values)
    eigen2[i] <- max(eigen(eq.38)$values)
    eigen3[i] <- max(eigen(eq.39)$values)
    eigen4[i] <- max(eigen(eq.40)$values)

  }

  plot(MPSRF1, typ = "l")
  lines(PSRF1_1, col = 2)
  lines(PSRF1_2, col = 3)

  plot(MPSRF2, typ = "l")
  lines(PSRF2_1, col = 2)
  lines(PSRF2_2, col = 3)

  plot(eigen1, typ = "l")
  lines(eigen2, col = 2)

  plot(eigen3, typ = "l")
  lines(eigen4, col = 2)

}

RJMCMC_conver_plt2 <- function(k, batch.size = 50){

  n.chains <- dim(k)[3]
  complete.batch <- floor(nrow(k[,,1])/batch.size)

  nrow.array <- complete.batch*batch.size
  k <- k[1:nrow.array,,]
  chain.id <- rep(1:n.chains, each = nrow.array)
  batch.id <- rep(1:complete.batch, each = batch.size )

  k.stacked.converge <- k[,,1]

  for(i in 2:n.chains){
    k.stacked.converge  <-rbind(k.stacked.converge,k[,,i])
    #changepoint.stacked.converge <-rbind(changepoint.stacked.converge,changepoint[,,i])
  }

  num.changepoints <-unlist(apply(k.stacked.converge,1, function(x){length(na.omit(x))}))

  converge.array <- array(NA,dim = c(nrow.array*n.chains,3))
  converge.array[,1] <- num.changepoints
  converge.array[,2] <- batch.id
  converge.array[,3] <- chain.id
  converge.array <- data.frame(converge.array)
  colnames(converge.array) <- c( "NumChangepoint", "Batch","Chain")
  PSRF1_1 <- rep(NA,complete.batch)


  for(i in 1:complete.batch){

    temp.converge.array <- converge.array %>% dplyr::filter(Batch %in% c(1:i))
    T.val <- batch.size*i
    M.val <- unique(temp.converge.array$NumChangepoint)

    eq.11 <- temp.converge.array %>% group_by(Chain,NumChangepoint) %>%
      dplyr::summarise_all(mean,na.rm =T) %>% dplyr::select(-Batch)
    names(eq.11) <- c("Chain","NumChangepoint")

    eq.15 <- var(temp.converge.array[,1])

    eq.16 <- temp.converge.array%>% group_by(Chain) %>% dplyr::summarize(NumChangepoint_avg = mean(NumChangepoint))
    eq.16 <- temp.converge.array %>% dplyr::left_join(eq.16, by = "Chain") %>% mutate(diff_NumChange = (NumChangepoint-NumChangepoint_avg)^2)
    eq.16 <- sum(eq.16[,"diff_NumChange"])/(n.chains*(T.val -1))

    PSRF1_1[i] <- eq.15/eq.16


  }

  tiff("PSRF1_1.png",  units="in", width=6, height=4,res=80)

  plot(PSRF1_1, type = "l", ylab = "PSRF", xlab = "Batch Number")

  dev.off()

}



k<- Collapsing_Model[["k"]]
lambda <- Collapsing_Model[["lambda"]]

#have to unstack the lambda

seq(from = 1, to = nrow(lambda), by = dim(k)[1] )

upper <- dim(k)[1]*(1:dim(k)[3])
lower <- head(c(1, 1+ upper), n= -1)
dim_lamb <- dim(k)
dim_lamb[2] <- dim_lamb[2]+1
lambda_array <- array(NA, dim = dim_lamb)

for(i in 1:dim(k)[3]){
  lambda_array[,,i] <- lambda[lower[i]:upper[i],]

}

batch.size <- 5000
test.index <- Mode(k[,1,1])

undebug(RJMCMC_conver_plt)
#Have to comment back in the code to define a lambda_array, for some reason this only works with a weibull model.
RJMCMC_conver_plt(k,lambda = lambda_array, batch.size = batch.size, test.index = test.index)
RJMCMC_conver_plt2(k, batch.size = batch.size)



# 6 NICE plot of Changepoint Log-Likelihood  ------------


library("purrr")
library("pch")
library("plotly")
# Dataset will be in PHD thesis in Important R files
url.path <- "C:\\Users\\phili\\OneDrive\\PhD\\Datasets\\e1690.missing.dat"

E1690.dat <- read.delim(url.path, header = T, sep="", skip=12, as.is=TRUE)
#Drop PFS events with time equal zero
E1690.dat <- E1690.dat[-which(E1690.dat$FAILTIME ==0),]

#Convert to the correct notation for survival objects
E1690.dat[which(E1690.dat$FAILCENS == 1),"FAILCENS"] <-0
E1690.dat[which(E1690.dat$FAILCENS == 2),"FAILCENS"] <-1
E1690.dat[which(E1690.dat$SURVCENS == 1),"SURVCENS"] <-0
E1690.dat[which(E1690.dat$SURVCENS == 2),"SURVCENS"] <-1

#install.packages("plotly")
#install.packages("pch")

library("plotly")
library("pch")


grid.search.piecewise.pchreg <- function(min.break, max.break, grid.width,
                                         num.breaks, min.break.width, time, status){

  seq.new <- seq(min.break, max.break, by = grid.width)
  combn.df <- t(combn(seq.new, num.breaks))
  combn.df <- data.frame(cbind(0,combn.df,max(time)))
  combn.df.diff <- t(apply(combn.df, 1, diff))
  drop.index <- which(apply(combn.df.diff, 1, function(x)(any(x < min.break.width))) == TRUE)

  if(length(drop.index) !=0){
    combn.df <- data.frame(combn.df[-drop.index,])
  }

  print(paste0("Number of evaluations = ", nrow(combn.df)))

  Surv.formula <- as.formula("Surv(time, status) ~ 1")
  #https://www.r-bloggers.com/skip-errors-in-r-loops-by-not-writing-loops/
  possibly_some_function = possibly(function(x)pchreg(Surv.formula,breaks = x)$logLik,
                                    otherwise = NA)
  combn.df$Likelihood <- apply(combn.df, 1, possibly_some_function)

  #combn.df$Likelihood <- apply(combn.df, 1, function(x)pchreg(Surv.formula,breaks = x)$logLik)

  #drop.index2 <- which(combn.df$Likelihood == -1.000000e+20)

  #if(length(drop.index2) !=0){
  #combn.df <- combn.df[-drop.index2,]
  #}
  combn.df[which.max(combn.df$Likelihood),]
  if(num.breaks == 2){

    #https://www.r-bloggers.com/creating-a-matrix-from-a-long-data-frame/

    combn.df$X2_mod <- match(combn.df$X2, sort(unique(combn.df$X2)))
    combn.df$X3_mod <- match(combn.df$X3, sort(unique(combn.df$X3)))

    n1 <- length(unique(combn.df$X2))
    n2 <- length(unique(combn.df$X3))

    matrix.triangle  <- with(combn.df, {
      M <- matrix(nrow=n1, ncol=n2)
      M[cbind(X2_mod, X3_mod)] <- Likelihood
      M
    })



    #kd <- with(MASS::geyser, MASS::kde2d(duration, waiting, n = 50))
    #persp(x =combn.df$X1_mod,  y =combn.df$X2_mod, z = M)
    output <- list()

    output[[1]] <- plotly::plot_ly(x = sort(unique(combn.df$X3)), y = sort(unique(combn.df$X2)), z = matrix.triangle) %>% add_surface() %>%
      layout(scene = list(xaxis=list(title = "2nd Change-point"),
                          yaxis=list(title = "1st Change-point"),
                          zaxis = list(title = "Log-Likelihood")))

    combn.df <- subset(combn.df, select = -c(X2_mod,X3_mod))
    output[[2]] <-  combn.df

    return(output)

  }

  return(combn.df)

}



trt.df <- E1690.dat[which( E1690.dat$TRT == 2 & E1690.dat$STUDY == "1684"),]


result <- grid.search.piecewise.pchreg(min.break = 0.5,
                                       max.break = 7,
                                       grid.width = 0.25,
                                       num.breaks = 2,
                                       min.break.width = 1,
                                       time = trt.df$FAILTIME,
                                       status = trt.df$FAILCENS)
htmlwidgets::saveWidget(result[[1]], #the graph to export
                        file = "graph 1.html")


plotly::export(p = result[[1]], #the graph to export
               file = "graph 1.png") #the name and type of file (can be .png, .jpeg, etc.)

## Add in the simulation studies


library(ggplot2)
library(ggpubr)
library(stringr)

loadRData <- function(fileName){
  #loads an RData file, and returns it
  load(fileName)
  get(ls()[ls() != "fileName"])
}
sim.path <- "C:\\Users\\phili\\OneDrive\\PhD\\R codes\\Changepoint Analysis\\Simulation Study\\Simulation Collapsing\\One Changepoint\\"
model.names <- c("decreasing_haz_lrg_","decreasing_haz_sml_","increasing_haz_lrg_","increasing_haz_sml_")
model.size <- c("200","500","1000")

#sim.path <- "C:/Users/phili/OneDrive/PhD/Simulation Study/Simulation Collapsing/"
#model.names <- c("bathtub_haz","decreasing_haz","increasing_haz","invertbath_haz")
#model.size <- c("300","500","1000")

model.com <- rep(NA, length(model.names)*length(model.size))
cnt <- 0
for(i in 1:length(model.names)){
  for(j in 1:length(model.size)){
    cnt <- cnt +1
    model.com[cnt] <- paste0(model.names[i],model.size[j])

  }
}
library(stringr)

n <- 1000

df.prob <- data.frame(probability= rep(NA,12),
                      model= rep(NA,12),
                      n =  rep(NA,12))

dict.names <- cbind(c("Increasing Small", "Increasing Large", "Decreasing Small", "Decreasing Large"),
                    c("increasing_haz_sml_","increasing_haz_lrg_", "decreasing_haz_sml_","decreasing_haz_lrg_"))

for(i in 1:length(model.com)){
  model.name <- model.com[i]
  sim.size <- as.numeric(gsub("[^0-9.]", "",  model.name))
  assign(model.name,loadRData(paste0(sim.path,model.com[i],".RData")))
  assign("current.model",loadRData(paste0(sim.path,model.com[i],".RData")))
  txt.string <- str_remove(model.name,paste0(sim.size))#str_remove(model.name,paste0(sim.size))#

  df.prob[i,1] <- current.model$prob.correct*100 #
  df.prob[i,2] <- dict.names[which(dict.names[,2]==txt.string),1]
  df.prob[i,3] <- sim.size

  for(j in 1:1){
    shape <- ((current.model$haz[j,1])^2)/(current.model$haz[j,2]^2)
    rate <- (current.model$haz[j,1])/(current.model$haz[j,2]^2)
    assign(paste0("df.res",j),data.frame(hazard = rgamma(n, shape = shape, rate = rate),
                                         changepoint = rnorm(n, current.model$chng[j,1], current.model$chng[j,2]),
                                         n = as.factor(sim.size), interval= as.factor(j), model = dict.names[which(dict.names[,2]==txt.string),1]))



  }
  shape <- ((current.model$haz[2,1])^2)/(current.model$haz[2,2]^2)
  rate <- (current.model$haz[2,1])/(current.model$haz[2,2]^2)

  df.res2 <-data.frame(hazard = rgamma(n, shape = shape, rate = rate),
                       changepoint = NA,
                       n = as.factor(sim.size), interval = as.factor(3),
                       model =  dict.names[which(dict.names[,2]==txt.string),1])


  assign(paste0("df.res_all",txt.string,sim.size),rbind(df.res1,df.res2))


  rm(df.res1,df.res2,txt.string)

}

for(i in 1:nrow(dict.names)){

  assign(paste0("df.res_final",dict.names[i,2]),rbind(get(paste0("df.res_all",dict.names[i,2],200)),
                                                      get(paste0("df.res_all",dict.names[i,2],500)),
                                                      get(paste0("df.res_all",dict.names[i,2],1000))))

}

df.prob$n <- as.factor(df.prob$n)
#df.prob$model <- as.factor(df.prob$model)

df.prob$model <- factor(df.prob$model, levels = c("Increasing Small", "Increasing Large", "Decreasing Small", "Decreasing Large"))
prob.plot <- ggplot(df.prob, aes(fill=n, y=probability, x=model)) +
  geom_bar(position="dodge", stat="identity")+
  scale_y_continuous(breaks = seq(0,100,10))+
  xlab("")+
  ylab("Probability")+
  theme_bw()+
  theme(legend.position = "none", plot.margin = unit(c(1.5,0.5,0.5,0.5), "lines"))
true.haz <- data.frame(model = dict.names[,2],
                       interval1  = c(0.25,0.75,0.75,0.2),
                       interval2  = c(0.5,0.5,0.2,0.75),
                       interval3  = c(0.75,0.25,0.75,0.2))


for(i in 1:nrow(dict.names)){

  df.plt.temp <- get(paste0("df.res_final",dict.names[i,2]))
  df.plt.temp$interval <- as.factor(df.plt.temp$interval)
  df.plt.temp$model <- as.factor(df.plt.temp$model)
  df.plt.temp$n <- as.factor(df.plt.temp$n)

  #index <- which(dict.names[i,2]==true.haz[,1])
  #print(index)
  gg.plt.temp <-  ggplot(df.plt.temp, aes(x=interval, y=hazard, fill=n)) +
    geom_boxplot(position=position_dodge(1), outlier.shape = NA)+
    #geom_segment(aes(x=2/3,xend=4/3,y=true.haz[index,1+1],yend=true.haz[index,1+1]), colour = "black", linetype = 1, size = 1, show.legend   = FALSE)+
    #geom_segment(aes(x=5/3,xend=7/3,y=true.haz[index,2+1],yend=true.haz[index,2+1]), colour = "orange", linetype = 1, size = 1, show.legend   = FALSE) +
    #geom_segment(aes(x=8/3,xend=10/3,y=true.haz[index,3+1],yend=true.haz[index,3+1]), colour = "purple", linetype = 1, size = 1, show.legend   = FALSE)+
    xlab("")+
    scale_y_continuous(breaks = seq(0,1.2,.2))+
    theme(plot.margin = unit(c(0.5,0.5,0.5,0.5), "lines"),
          axis.title.x = element_blank(),
          axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          panel.background = element_rect(fill = "lightblue",
                                          colour = "lightblue",
                                          size = 0.5, linetype = "solid"))+
    coord_cartesian(ylim = c(0,1.2),expand = FALSE)
  #print(c(true.haz[index,1+1],true.haz[index,2+1],true.haz[index,3+1]))

  gg.plt.temp
  assign(paste0("p_hazard_",dict.names[i,2]),gg.plt.temp)
  rm(gg.plt.temp)

  gg.plt.temp2  <- ggplot(df.plt.temp[!is.na(df.plt.temp$changepoint),], aes(x=interval, y=changepoint, fill=n)) +
    geom_boxplot(position=position_dodge(1), outlier.shape = NA)+
    #geom_segment(aes(x=2/3,xend=4/3,y=0.5,yend=0.5), colour = "black", linetype = 1, size = 1, show.legend  = FALSE)+
    #geom_segment(aes(x=5/3,xend=7/3,y=1,yend=1), colour = "orange", linetype = 1, size = 1, show.legend   = FALSE)+
    xlab("")+
    ylab("")+
    scale_y_continuous(breaks = seq(.2,1.4,.2))+
    theme(plot.margin = unit(c(0.5,0.5,0.5,0.5), "lines"),
          axis.title.x = element_blank(),
          axis.text.x = element_blank(),
          axis.ticks.x = element_blank())+
    coord_cartesian(ylim = c(.2,1.4),expand = FALSE)

  assign(paste0("p_change_",dict.names[i,2]),gg.plt.temp2)
  rm(gg.plt.temp2)

}

plt.output <- ggarrange(
  ggarrange(p_hazard_increasing_haz_sml_ +ylab("Increasing Small"),
            p_change_increasing_haz_sml_,
            p_hazard_increasing_haz_lrg_ +ylab("Increasing Large"),
            p_change_increasing_haz_lrg_,
            p_hazard_decreasing_haz_sml_ +ylab("Decreasing Small"),
            p_change_decreasing_haz_sml_,
            p_hazard_decreasing_haz_lrg_ +ylab("Decreasing Large"),
            p_change_decreasing_haz_lrg_,
            nrow = 4, ncol = 2, common.legend = TRUE, legend = "bottom"), prob.plot+theme(axis.text=element_text(size=8)))
#Could annotate figure at bottom

ggsave(annotate_figure(plt.output, fig.lab.pos = c("top.left"),fig.lab.face= "bold",fig.lab =  c("Hazard across intervals         Changepoints                           Probability of selecting correct model")) ,
       filename = "Collapsing Model One Changepoint.png", width = 8.5, height =6)



# 7 Plot of Model results from simulation study ------------
#C:\Users\phili\OneDrive\PhD\R codes\Changepoint Analysis\Simulation Study


library(ggplot2)
library(ggpubr)
library(stringr)

## Need to update for  Chapell analysis
## Include mean difference from true survival function as per Chapell

sim.study <- function(n_obs,n_events_req,rate,t_change, max_time =2,
                      n.sims, sims = 10750,burn_in = 750){

  if(is.na(t_change)){
    num.breaks = 0
  }else{
    num.breaks <- length(t_change)

  }


  n.events <- rep(NA, n.sims)
  avg.Haz <- array(NA, dim = c(n.sims, 7))
  avg.change <- array(NA, dim = c(n.sims, 6))

  for(i in 1:n.sims){
    print(paste0("Sim ",i," of ",n.sims))
    Sys.sleep(1 / n.sims)

    n_events <- 0

    while(n_events  != req_obs){
      df <- gen_piece_df(n_obs = n_obs,n_events_req = n_events_req,
                         num.breaks = num.breaks,rate = rate ,
                         t_change = t_change, max_time = max_time)
      n_events <-  df$status
    }

    n.events[i] <- sum(df$status == 1)
    #tryCatch({
    mod <- RJMC.piece.compact_rcpp(sims, df,n.chains = 1, burn_in = burn_in )
    avg.Haz[i,1:length(mod[[1]])] <- mod[[1]]
    avg.change[i,1:length(mod[[2]])] <- mod[[2]]
    #}, error = function(e){print("Nuts")})
  }

  chang.vals <-apply(avg.change,1, function(x){length(na.omit(x))})
  uniq.chang <- unique(chang.vals)[order(unique(chang.vals))]
  final.chng <- data.frame(mean = rep(NA,length(uniq.chang)),sd = rep(NA,length(uniq.chang)), nsims = rep(NA,length(uniq.chang)))
  for(i in 1:length(uniq.chang)){
    final.chng[i,1]  <- mean(avg.change[which(chang.vals == uniq.chang[i]),uniq.chang[i]], na.rm =T)
    final.chng[i,2] <- sd(avg.change[which(chang.vals == uniq.chang[i]),uniq.chang[i]], na.rm =T)
    final.chng[i,3] <- table(chang.vals)[i]
  }

  row.names(final.chng) <- uniq.chang
  prob.correct <- mean(chang.vals == num.breaks)

  if(length(which(chang.vals == num.breaks)) >1){

    if(num.breaks == 0 ){
      se.chng <- mean.chng <- NA
      mean.haz <- mean(avg.Haz[which(chang.vals == num.breaks),
                               1:(num.breaks+1)], na.rm = T)
      se.haz <- sd(avg.Haz[which(chang.vals == num.breaks),
                           1:(num.breaks+1)], na.rm = T)
    }else if(num.breaks == 1 ){

      mean.chng <- mean(avg.change[which(chang.vals == num.breaks),
                                   1:num.breaks], na.rm =T)
      se.chng <- sd(avg.change[which(chang.vals == num.breaks),1:num.breaks],na.rm = T)

      mean.haz <- colMeans(avg.Haz[which(chang.vals == num.breaks),
                                   1:(num.breaks+1)], na.rm = T)

      se.haz <- apply(avg.Haz[which(chang.vals == num.breaks),
                              1:(num.breaks+1)],2,FUN = sd, na.rm = T)


    }else{

      mean.chng <- colMeans(avg.change[which(chang.vals == num.breaks),
                                       1:num.breaks], na.rm =T)
      se.chng <- apply(avg.change[which(chang.vals == num.breaks),1:num.breaks]
                       ,2,FUN= sd,na.rm = T)

      mean.haz <- colMeans(avg.Haz[which(chang.vals == num.breaks),
                                   1:(num.breaks+1)], na.rm = T)

      se.haz <- apply(avg.Haz[which(chang.vals == num.breaks),
                              1:(num.breaks+1)],2,FUN = sd, na.rm = T)
    }

  }else{

    mean.chng<- NA
    se.chng <- NA
    mean.haz <- NA
    se.haz <- NA

  }

  return(list(final.chng = final.chng,prob.correct = prob.correct,
              chng = data.frame(mean.chng,se.chng),
              haz = data.frame(mean.haz,se.haz),
              prob.mod = chang.vals))
}


loadRData <- function(fileName){
  #loads an RData file, and returns it
  load(fileName)
  get(ls()[ls() != "fileName"])
}

#Now in G-Drive
sim.path <- "G:\\My Drive\\PhD\\R codes\\Changepoint Analysis\\Simulation Study\\Simulation Collapsing\\One Changepoint\\"
model.names <- c("decreasing_haz_lrg_","decreasing_haz_sml_","increasing_haz_lrg_","increasing_haz_sml_")
model.size <- c("200","500","1000")

#sim.path <- "C:/Users/phili/OneDrive/PhD/Simulation Study/Simulation Collapsing/"
#model.names <- c("bathtub_haz","decreasing_haz","increasing_haz","invertbath_haz")
#model.size <- c("300","500","1000")

model.com <- rep(NA, length(model.names)*length(model.size))
cnt <- 0
for(i in 1:length(model.names)){
  for(j in 1:length(model.size)){
    cnt <- cnt +1
    model.com[cnt] <- paste0(model.names[i],model.size[j])

  }
}
library(stringr)

n <- 1000

df.prob <- data.frame(probability= rep(NA,12),
                      model= rep(NA,12),
                      n =  rep(NA,12))

dict.names <- cbind(c("Increasing Small", "Increasing Large", "Decreasing Small", "Decreasing Large"),
                    c("increasing_haz_sml_","increasing_haz_lrg_", "decreasing_haz_sml_","decreasing_haz_lrg_"))

for(i in 1:length(model.com)){
  model.name <- model.com[i]
  sim.size <- as.numeric(gsub("[^0-9.]", "",  model.name))
  assign(model.name,loadRData(paste0(sim.path,model.com[i],".RData")))
  assign("current.model",loadRData(paste0(sim.path,model.com[i],".RData")))
  txt.string <- str_remove(model.name,paste0(sim.size))#str_remove(model.name,paste0(sim.size))#

  df.prob[i,1] <- current.model$prob.correct*100 #
  df.prob[i,2] <- dict.names[which(dict.names[,2]==txt.string),1]
  df.prob[i,3] <- sim.size

  for(j in 1:1){
    shape <- ((current.model$haz[j,1])^2)/(current.model$haz[j,2]^2)
    rate <- (current.model$haz[j,1])/(current.model$haz[j,2]^2)
    assign(paste0("df.res",j),data.frame(hazard = rgamma(n, shape = shape, rate = rate),
                                         changepoint = rnorm(n, current.model$chng[j,1], current.model$chng[j,2]),
                                         n = as.factor(sim.size), interval= as.factor(j), model = dict.names[which(dict.names[,2]==txt.string),1]))



  }
  shape <- ((current.model$haz[2,1])^2)/(current.model$haz[2,2]^2)
  rate <- (current.model$haz[2,1])/(current.model$haz[2,2]^2)

  df.res2 <-data.frame(hazard = rgamma(n, shape = shape, rate = rate),
                       changepoint = NA,
                       n = as.factor(sim.size), interval = as.factor(3),
                       model =  dict.names[which(dict.names[,2]==txt.string),1])


  assign(paste0("df.res_all",txt.string,sim.size),rbind(df.res1,df.res2))


  rm(df.res1,df.res2,txt.string)

}

for(i in 1:nrow(dict.names)){

  assign(paste0("df.res_final",dict.names[i,2]),rbind(get(paste0("df.res_all",dict.names[i,2],200)),
                                                      get(paste0("df.res_all",dict.names[i,2],500)),
                                                      get(paste0("df.res_all",dict.names[i,2],1000))))

}

df.prob$n <- as.factor(df.prob$n)
#df.prob$model <- as.factor(df.prob$model)

df.prob$model <- factor(df.prob$model, levels = c("Increasing Small", "Increasing Large", "Decreasing Small", "Decreasing Large"))
prob.plot <- ggplot(df.prob, aes(fill=n, y=probability, x=model)) +
  geom_bar(position="dodge", stat="identity")+
  scale_y_continuous(breaks = seq(0,100,10))+
  xlab("")+
  ylab("Probability")+
  theme_bw()+
  theme(legend.position = "none", plot.margin = unit(c(1.5,0.5,0.5,0.5), "lines"))
true.haz <- data.frame(model = dict.names[,2],
                       interval1  = c(0.25,0.75,0.75,0.2),
                       interval2  = c(0.5,0.5,0.2,0.75),
                       interval3  = c(0.75,0.25,0.75,0.2))


for(i in 1:nrow(dict.names)){

  df.plt.temp <- get(paste0("df.res_final",dict.names[i,2]))
  df.plt.temp$interval <- as.factor(df.plt.temp$interval)
  df.plt.temp$model <- as.factor(df.plt.temp$model)
  df.plt.temp$n <- as.factor(df.plt.temp$n)

  #index <- which(dict.names[i,2]==true.haz[,1])
  #print(index)
  gg.plt.temp <-  ggplot(df.plt.temp, aes(x=interval, y=hazard, fill=n)) +
    geom_boxplot(position=position_dodge(1), outlier.shape = NA)+
    #geom_segment(aes(x=2/3,xend=4/3,y=true.haz[index,1+1],yend=true.haz[index,1+1]), colour = "black", linetype = 1, size = 1, show.legend   = FALSE)+
    #geom_segment(aes(x=5/3,xend=7/3,y=true.haz[index,2+1],yend=true.haz[index,2+1]), colour = "orange", linetype = 1, size = 1, show.legend   = FALSE) +
    #geom_segment(aes(x=8/3,xend=10/3,y=true.haz[index,3+1],yend=true.haz[index,3+1]), colour = "purple", linetype = 1, size = 1, show.legend   = FALSE)+
    xlab("")+
    scale_y_continuous(breaks = seq(0,1.2,.2))+
    theme(plot.margin = unit(c(0.5,0.5,0.5,0.5), "lines"),
          axis.title.x = element_blank(),
          axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          panel.background = element_rect(fill = "lightblue",
                                          colour = "lightblue",
                                          size = 0.5, linetype = "solid"))+
    coord_cartesian(ylim = c(0,1.2),expand = FALSE)
  #print(c(true.haz[index,1+1],true.haz[index,2+1],true.haz[index,3+1]))

  gg.plt.temp
  assign(paste0("p_hazard_",dict.names[i,2]),gg.plt.temp)
  rm(gg.plt.temp)

  gg.plt.temp2  <- ggplot(df.plt.temp[!is.na(df.plt.temp$changepoint),], aes(x=interval, y=changepoint, fill=n)) +
    geom_boxplot(position=position_dodge(1), outlier.shape = NA)+
    #geom_segment(aes(x=2/3,xend=4/3,y=0.5,yend=0.5), colour = "black", linetype = 1, size = 1, show.legend  = FALSE)+
    #geom_segment(aes(x=5/3,xend=7/3,y=1,yend=1), colour = "orange", linetype = 1, size = 1, show.legend   = FALSE)+
    xlab("")+
    ylab("")+
    scale_y_continuous(breaks = seq(.2,1.4,.2))+
    theme(plot.margin = unit(c(0.5,0.5,0.5,0.5), "lines"),
          axis.title.x = element_blank(),
          axis.text.x = element_blank(),
          axis.ticks.x = element_blank())+
    coord_cartesian(ylim = c(.2,1.4),expand = FALSE)

  assign(paste0("p_change_",dict.names[i,2]),gg.plt.temp2)
  rm(gg.plt.temp2)

}

plt.output <- ggarrange(
  ggarrange(p_hazard_increasing_haz_sml_ +ylab("Increasing Small"),
            p_change_increasing_haz_sml_,
            p_hazard_increasing_haz_lrg_ +ylab("Increasing Large"),
            p_change_increasing_haz_lrg_,
            p_hazard_decreasing_haz_sml_ +ylab("Decreasing Small"),
            p_change_decreasing_haz_sml_,
            p_hazard_decreasing_haz_lrg_ +ylab("Decreasing Large"),
            p_change_decreasing_haz_lrg_,
            nrow = 4, ncol = 2, common.legend = TRUE, legend = "bottom"), prob.plot+theme(axis.text=element_text(size=8)))
#Could annotate figure at bottom

ggsave(annotate_figure(plt.output, fig.lab.pos = c("top.left"),fig.lab.face= "bold",fig.lab =  c("Hazard across intervals         Changepoints                           Probability of selecting correct model")) ,
       filename = "Collapsing Model One Changepoint.png", width = 8.5, height =6)



# 8. Read in Simulation Study Results for Publication -----


#Simulation
path <-"C:\\Users\\phili\\OneDrive\\PhD\\R codes\\Changepoint Analysis\\Biometrics publication\\Simulation\\"


rate1 <- c(0.5,0.75)
rate2 <- c(0.25,0.75)
rate3 <- c(0.75,0.5)
rate4 <- c(0.75,0.25)

rate5 <- c(0.25,0.5,0.75)
rate6 <- c(0.75,0.5,0.25)
rate7 <- c(0.75,0.2,0.75)
rate8 <- c(0.2,0.75,0.2)

time1 <- c(0.5,1)

censor_vec <- c(0,0.33)
sample_vec <- c(300,500,1000)
rate_list <- list()

for(i in 1:8){
  current_rate <-get(paste0("rate",i))
  rate_list[[i]] <- current_rate
}

res_vec <- array(NA, dim = c(length(censor_vec)*length(sample_vec)*length(rate_list),2))
res_vec <- array(NA, dim = c(1*length(sample_vec)*length(rate_list),7))
res_vec <- data.frame(res_vec)
f <- 1
for(i in 1:length(rate_list)){
  for(j in 1:length(sample_vec)){
    for(q in 1:1 ){
      #for(q in 1:length(censor_vec) ){
      current_name <- paste0("Gibbs_rate_",paste0(rate_list[[i]],collapse = "_"),"_size_",sample_vec[j],"_censor_",censor_vec[q])
      res_vec[f,1] <- current_name
      res <- readRDS(file = paste0(path,"Gibbs Simulations 2021\\",current_name,".rds"))
      res_vec[f,2]<- as.numeric(res$prob.correct)
      res_vec[f,3] <- as.numeric(res$chng[1,1])
      res_vec[f,4] <- as.numeric(res$chng[1,2])
      if(nrow(res$chng)>1){
        res_vec[f,5] <- as.numeric(res$chng[2,1])
        res_vec[f,6] <- as.numeric(res$chng[2,2])
      }
      res_vec[f,7] <- i
      f <- f +1
    }
  }
}

library(dplyr)
colnames(res_vec) <- c("Name","Prob","Tau1","SE1","Tau2","SE2","Model")


res_vec <- as_tibble(res_vec) %>% mutate_if(is.numeric,round, digits = 2)
res_vec$Prob <- res_vec$Prob*100
res_vec$Comb1 <- paste0(res_vec$Tau1," (",res_vec$SE1,")")
result <- aggregate(Comb1 ~ Model, data = res_vec, paste, collapse = " & ")
result$Comb1 <- paste0(result$Comb1," & 0.5 ")
res_vec$Comb2 <- paste0(res_vec$Tau2," (",res_vec$SE2,")")
result2 <- aggregate(Comb2 ~ Model, data = res_vec, paste, collapse = " & ")
result2$Comb2 <- paste0(result2$Comb2," & 1.0 ")
result3 <- aggregate(Prob ~ Model, data = res_vec, paste, collapse = " & ")


#No Changepoint

rate_list <- c(0.25, 0.5,0.75)
sample_vec <- c(100,200)
censor_vec <- c(0,0.5)
res_vec2 <- res_vec <- data.frame(array(NA, dim = c(1*length(sample_vec)*length(rate_list),2)))
f <- 1
for(i in 1:length(rate_list)){
  for(j in 1:length(sample_vec)){
    for(q in 1:length(censor_vec) ){

      current_name <- paste0("Gibbs_rate_",paste0(rate_list[[i]],collapse = "_"),"_size_",sample_vec[j],"_censor_",censor_vec[q])
      current_name2 <- paste0("Collapsing_rate_",paste0(rate_list[[i]],collapse = "_"),"_size_",sample_vec[j],"_censor_",censor_vec[q])
      res <- readRDS(file = paste0(path,"No Changepoint//",current_name,".rds"))

      res_vec[f,1] <-current_name
      res_vec[f,2] <-res$prob.correct

      res2 <- readRDS(file = paste0(path,"No Changepoint//",current_name2,".rds"))
      res_vec2[f,1] <-current_name2
      res_vec2[f,2] <-mean(res2$prob.mod == 0)

      f <- f +1
    }
  }
}




res_vec$X2 <- round(res_vec$X2*100,0)
res_vec2$X2 <- round(res_vec2$X2*100,0)
res_vec <- res_vec[-grep("size_200_censor_0$", res_vec$X1),]
res_vec2 <- res_vec2[-grep("size_200_censor_0$", res_vec2$X1),]

cbind(paste0(res_vec$X2," & ",res_vec2$X2))




#Simulation Read in Collapsing


path <-"C:\\Users\\phili\\OneDrive\\PhD\\R codes\\Changepoint Analysis\\Biometrics publication\\Simulation\\"


loadRData <- function(fileName){
  #loads an RData file, and returns it
  load(fileName)
  get(ls()[ls() != "fileName"])
}


rate1 <- c(0.5,0.75)
rate2 <- c(0.25,0.75)
rate3 <- c(0.75,0.5)
rate4 <- c(0.75,0.25)

rate5 <- c(0.25,0.5,0.75)
rate6 <- c(0.75,0.5,0.25)
rate7 <- c(0.75,0.2,0.75)
rate8 <- c(0.2,0.75,0.2)

time1 <- c(0.5,1)

censor_vec <- c(0,0.33)
sample_vec <- c(300,500,1000)
rate_list <- list()

for(i in 1:8){
  current_rate <-get(paste0("rate",i))
  rate_list[[i]] <- current_rate
}

res_vec <- array(NA, dim = c(length(censor_vec)*length(sample_vec)*length(rate_list),2))
res_vec <- array(NA, dim = c(1*length(sample_vec)*length(rate_list),7))
res_vec <- data.frame(res_vec)
f <- 1
for(i in 1:length(rate_list)){
  for(j in 1:length(sample_vec)){
    for(q in 1:1 ){
      #for(q in 1:length(censor_vec) ){
      current_name <- paste0("Collapsing_rate_",paste0(rate_list[[i]],collapse = "_"),"_size_",sample_vec[j],"_censor_",censor_vec[q])
      res_vec[f,1] <- current_name

      #if(j ==3 ){
      #res <- loadRData(fileName = paste0(path,"Collapsing Simulations without 1000\\",current_name,".RData"))
      #}else{
      #res <- readRDS(file = paste0(path,"Collapsing Simulations without 1000\\",current_name,".rds"))
      #}
      res <- readRDS(file = paste0(path,"Collapsing Simulations\\",current_name,".rds"))


      res_vec[f,2]<- as.numeric(res$prob.correct)
      res_vec[f,3] <- as.numeric(res$chng[1,1])
      res_vec[f,4] <- as.numeric(res$chng[1,2])
      if(nrow(res$chng)>1){
        res_vec[f,5] <- as.numeric(res$chng[2,1])
        res_vec[f,6] <- as.numeric(res$chng[2,2])
      }
      res_vec[f,7] <- i
      f <- f +1
    }
  }
}




colnames(res_vec) <- c("Name","Prob","Tau1","SE1","Tau2","SE2","Model")


res_vec <- as_tibble(res_vec) %>% mutate_if(is.numeric,round, digits = 2)
res_vec$Prob <- res_vec$Prob*100
res_vec$Comb1 <- paste0(res_vec$Tau1," (",res_vec$SE1,")")
result <- aggregate(Comb1 ~ Model, data = res_vec, paste, collapse = " & ")
#result$Comb1 <- paste0(result$Comb1," & 0.5 ")
res_vec$Comb2 <- paste0(res_vec$Tau2," (",res_vec$SE2,")")
result2 <- aggregate(Comb2 ~ Model, data = res_vec, paste, collapse = " & ")
#result2$Comb2 <- paste0(result2$Comb2," & 1.0 ")
result3 <- aggregate(Prob ~ Model, data = res_vec, paste, collapse = " & ")


#Or just write a latex table
Philip-Cooney/PiecewiseChangepoint documentation built on Sept. 10, 2023, 9:49 p.m.